View Code/Details
nusmodsnusmods API at https://nusmods.com/api/.# load bidding data
# calculate loading times
before <- Sys.time()
# read data directly from URL
myjson <- fromJSON(file = url("https://api.nusmods.com/corsBiddingStatsRaw.json"))
# create empty dataframe which will act as a container to be populated with data
myBid <- data.frame()
# for each element in the myjson list, append it to myBid
for(r in 1:length(myjson))
{
if(myjson[[r]]$Semester == 1 | myjson[[r]]$Semester == 2)
{
myBid <- rbind(myBid, myjson[[r]])
}
myjson[[r]] <- NA
}
# calculate loading time
after <- Sys.time()
after - before
# save
saveRDS(myBid, file = "myBid.RDS")myBid.RDS# create empty dataframe which will act as a container to be populated with data
myModInfo <- data.frame()
# looping through each year
for(year in c(2011:2018))
{
for(semester in c(1,2))
{
# create the url where data is to be extracted from
myurl <- paste0("https://api.nusmods.com/", year, "-", year + 1, "/", semester, "/moduleTimetableDeltaRaw.json")
myjson <- fromJSON(file = url(myurl))
# for each element in the myjson list, append it to myModInfo
for(r in 1:length(myjson))
{
if(isTRUE(str_detect(myjson[[r]]$ModuleCode, "^PL")))
{
if(myjson[[r]]$Semester == 1 | myjson[[r]]$Semester == 2)
{
myModInfo <- rbind(myModInfo, myjson[[r]])
}
}
myjson[[r]] <- NA
}
cat(year, "Semester", semester, "Done!")
}
}
# save
saveRDS(myModInfo, file = "myModInfo.RDS")myModInfo.RDSView Code/Details
myModInfomyModInfo.
# only keep the Psychology modules information
myModInfo <- subset(myModInfo,
str_detect(myModInfo$ModuleCode, "^PL"))# remove information about tutorials
myModInfo <- subset(myModInfo,
myModInfo$LessonType != "TUTORIAL")# only keep these columns
myModInfo <- myModInfo[,grep("ModuleCode|DayText|StartTime|Semester|AcadYear", names(myModInfo))]
# remove duplicated rows based on columns of ModuleCode, Acadyear, Semester, StartTime and DayText
myModInfo <- distinct(myModInfo,
ModuleCode, AcadYear, Semester, StartTime, DayText)myBidmyBid.
# remove non-psychology modules
myBid <- subset(myBid,
# only keep rows where module code begins with PL
str_detect(myBid$ModuleCode, "^PL"))
# also remove Roots and Wings (PLS8001) and psychology for non-psych students (PLB1201)
myBid <- subset(myBid,
!str_detect(myBid$ModuleCode, "PLS|PLB"))
# remove the rounds where it was reserved
myBid <- subset(myBid,
!str_detect(myBid$StudentAcctType, "Reserved"))
# remove unneeded columns
myBid <- myBid[, -grep("Group|Faculty", names(myBid))]Level that denotes whether the module is Level 1, 2, 3 or 4.BpQ that represents Bids per Quota, which is the number of bidders for each available quota of the module, derived from Bidders and Quota. Used as a measure of the popularity of a module, Higher BpQ signifies greater popularity.LessonTime that denotes whether the lecture begins in the morning (before 12pm), in the afternoon (12pm to 4pm), in the evening (after 4pm).myModInfo to myBid.# create new column that indicates the level of the module, based on their module code
myBid$Level <- ifelse(str_detect(myBid$ModuleCode, "1[0-9][0-9][0-9]"), "Level 1",
ifelse(str_detect(myBid$ModuleCode, "2[0-9][0-9][0-9]"), "Level 2",
ifelse(str_detect(myBid$ModuleCode, "3[0-9][0-9][0-9]"), "Level 3",
ifelse(str_detect(myBid$ModuleCode, "4[0-9][0-9][0-9]"), "Level 4",
"Graduate Module"))))
# crosstabs to doublecheck
# xtabs( ~ ModuleCode + Level,
# data = myBid, subset = NULL)# create new column Bids Per Quota (BpQ)
myBid$BpQ <- as.numeric(myBid$Bidders)/as.numeric(myBid$Quota)# transform these columns to numeric
for(r in c("Quota", "Bidders", "LowestBid", "LowestSuccessfulBid", "HighestBid", "StartTime"))
{
mydata[,grep(r, names(mydata))] <- as.numeric(mydata[,grep(r, names(mydata))])
}
# transform these columns to factors
for(r in c("AcadYear", "Semester", "ModuleCode", "Round", "Level", "StudentAcctType", "DayText", "LessonTime"))
{
mydata[,grep(r, names(mydata))] <- factor(mydata[,grep(r, names(mydata))])
}# create vector of the column names which are factors
facnames <- names(select_if(mydata, is.factor))
# factor names without ModuleCode and StudentAcctType
facnames.mod <- facnames[-grep("ModuleCode|StudentAcctType", facnames)]
# create vector of the column names which are numeric
numnames <- names(select_if(mydata, is.numeric))
# numeric names without StartTime
numnames.time <- names(select_if(mydata, is.numeric))[-grep("StartTime", numnames)]DayText LevelsLessonTime LevelsBidders is calculated across all academic years, all bidding rounds, all modules…## ModuleCode AcadYear Semester Round Quota Bidders LowestBid LowestSuccessfulBid HighestBid StudentAcctType Level BpQ StartTime DayText LessonTime
## PL1101E: 324 2013/2014:493 1:1449 1A:634 Min. : 1.00 Min. : 0.0 Min. : 0.00 Min. : 0.0 Min. : 0.0 New Students [P] : 314 Level 1: 324 Min. : 0.00000 Min. : 800 Monday :479 Morning : 723
## PL3232 : 124 2015/2016:485 2:1380 1B:389 1st Qu.: 4.00 1st Qu.: 1.0 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 1.0 NUS Students [G] : 141 Level 2: 169 1st Qu.: 0.02326 1st Qu.:1100 Tuesday :588 Afternoon:1834
## PL3236 : 112 2014/2015:439 1C:273 Median : 15.00 Median : 3.0 Median : 1.00 Median : 1.0 Median : 400.0 NUS Students [P, G] : 327 Level 3:1520 Median : 0.33333 Median :1300 Wednesday:688 Evening : 272
## PL3234 : 110 2016/2017:366 2A:410 Mean : 25.95 Mean : 13.2 Mean : 70.16 Mean : 258.7 Mean : 732.7 NUS Students [P] : 331 Level 4: 816 Mean : 1.05331 Mean :1305 Thursday :684
## PL3235 : 109 2012/2013:350 2B:463 3rd Qu.: 32.00 3rd Qu.: 10.0 3rd Qu.: 8.00 3rd Qu.: 271.0 3rd Qu.:1237.0 Returning Students [P] :1191 3rd Qu.: 1.30000 3rd Qu.:1500 Friday :390
## PL3233 : 107 2011/2012:242 3A:366 Max. :430.00 Max. :440.0 Max. :2430.00 Max. :3459.0 Max. :4801.0 Returning Students [P] and NUS Students [G]: 155 Max. :18.00000 Max. :1900
## (Other):1943 (Other) :454 3B:294 Returning Students and New Students [P] : 370
View Histograms
# plot the categorical variables
# note: I did not include ModuleCode in this exploratory graph because it has too many levels (83)
for(r in facnames.mod)
{
cat(paste0("Histogram Of ", r))
plot(
ggplot(data = mydata, aes_string(x = r, fill = r)) +
geom_histogram(stat = "count") +
ylab("Count") +
ggtitle(paste0("Count of ", r)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = -0.3),
axis.title.x = element_blank())
)
}## Histogram Of AcadYear
## Histogram Of Semester
## Histogram Of Round
## Histogram Of Level
## Histogram Of DayText
## Histogram Of LessonTime
# plot the continuous variables
for(r in numnames)
{
cat(paste0("Histogram Of ", r))
plot(
ggplot(data = mydata, aes_string(x = r, fill = r)) +
geom_histogram(bins = 90, fill = "violetred") +
ylab("Histogram") +
ggtitle(paste0("Frequency of ", r)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = -0.3),
axis.title.x = element_text())
)
}## Histogram Of Quota
## Histogram Of Bidders
## Histogram Of LowestBid
## Histogram Of LowestSuccessfulBid
## Histogram Of HighestBid
## Histogram Of BpQ
## Histogram Of StartTime
View Categorical-Categorical Heatmaps
# create vector to loop across
for(r in 1:length(facnames.mod))
{
for(i in 1:length(facnames.mod))
{
# dont do anything if they are the same or the graph has been made before
if(i == r | i < r)
{
} else {
cat(paste0(facnames.mod[r]," ~ ",facnames.mod[i]))
# create formula for xtabs
tempform <- paste0("~ ", facnames.mod[r], " + ", facnames.mod[i])
# temp is a dataframe that is only going to exist in this section
# and overwritten with each loop
temp <- as.data.frame(xtabs(eval(parse(text = tempform)),
data = mydata,
subset = NULL))
plot(
ggplot(data = temp, aes_string(x = facnames.mod[r], y = facnames.mod[i], fill = "Freq", label = "Freq")) +
geom_tile() +
geom_text() +
scale_fill_gradient(low = "white", high = "violetred") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = -0.3),
legend.position = "none")
)
}
}
}## AcadYear ~ Semester
## AcadYear ~ Round
## AcadYear ~ Level
## AcadYear ~ DayText
## AcadYear ~ LessonTime
## Semester ~ Round
## Semester ~ Level
## Semester ~ DayText
## Semester ~ LessonTime
## Round ~ Level
## Round ~ DayText
## Round ~ LessonTime
## Level ~ DayText
## Level ~ LessonTime
## DayText ~ LessonTime
View Continuous-Continuous Scatterplots
for(r in 1:length(numnames))
{
for(i in 1:length(numnames))
{
# dont do anything if they are the same or the graph has been made before
if(i == r | i < r)
{
} else {
cat(paste0(numnames[r]," ~ ",numnames[i]))
# create formula for lm()
tempform.std <- paste0("scale(", numnames[i],")", " ~ ", "scale(", numnames[r], ")")
tempform <- paste0(numnames[i], " ~ ", numnames[r])
# regress to get best fit line
# standardized
stdreg <- lm(eval(parse(text = tempform.std)),
data = mydata)
# unstandardized
reg <- lm(eval(parse(text = tempform)),
data = mydata)
plot(
ggplot(data = mydata, aes_string(x = numnames[r], y = numnames[i])) +
geom_point(color = "violetred", size = 2, alpha = 0.3) +
theme_classic() +
geom_abline(slope = reg$coefficients[2], intercept = reg$coefficients[1], lty = "dashed") +
geom_label(aes(x = Inf, y = Inf, label = paste0("Standardized Regression Coefficient = ",
round(stdreg$coefficients[2],3)),
hjust = 1, vjust = 2)) +
theme(axis.text.x = element_text(angle = 90, vjust = -0.3))
)
}
}
}## Quota ~ Bidders
## Quota ~ LowestBid
## Quota ~ LowestSuccessfulBid
## Quota ~ HighestBid
## Quota ~ BpQ
## Quota ~ StartTime
## Bidders ~ LowestBid
## Bidders ~ LowestSuccessfulBid
## Bidders ~ HighestBid
## Bidders ~ BpQ
## Bidders ~ StartTime
## LowestBid ~ LowestSuccessfulBid
## LowestBid ~ HighestBid
## LowestBid ~ BpQ
## LowestBid ~ StartTime
## LowestSuccessfulBid ~ HighestBid
## LowestSuccessfulBid ~ BpQ
## LowestSuccessfulBid ~ StartTime
## HighestBid ~ BpQ
## HighestBid ~ StartTime
## BpQ ~ StartTime
View Continuous-Continuous Correlation Matrix
corrplot.mixed(cor(mydata[,grep(paste0(numnames.time, collapse = "|"), names(mydata))]),
upper = "color",
tl.pos = "lt",
tl.cex = 0.5,
cl.cex = 0.5)View Continuous-Categorical Boxplots
for(r in facnames.mod)
{
for(i in numnames)
{
cat(paste0(r," ~ ",i))
# graph
plot(
ggplot(data = mydata, aes_string(x = r, y = i, fill = r)) +
geom_boxplot() +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = -0.3))
)
}
}## AcadYear ~ Quota
## AcadYear ~ Bidders
## AcadYear ~ LowestBid
## AcadYear ~ LowestSuccessfulBid
## AcadYear ~ HighestBid
## AcadYear ~ BpQ
## AcadYear ~ StartTime
## Semester ~ Quota
## Semester ~ Bidders
## Semester ~ LowestBid
## Semester ~ LowestSuccessfulBid
## Semester ~ HighestBid
## Semester ~ BpQ
## Semester ~ StartTime
## Round ~ Quota
## Round ~ Bidders
## Round ~ LowestBid
## Round ~ LowestSuccessfulBid
## Round ~ HighestBid
## Round ~ BpQ
## Round ~ StartTime
## Level ~ Quota
## Level ~ Bidders
## Level ~ LowestBid
## Level ~ LowestSuccessfulBid
## Level ~ HighestBid
## Level ~ BpQ
## Level ~ StartTime
## DayText ~ Quota
## DayText ~ Bidders
## DayText ~ LowestBid
## DayText ~ LowestSuccessfulBid
## DayText ~ HighestBid
## DayText ~ BpQ
## DayText ~ StartTime
## LessonTime ~ Quota
## LessonTime ~ Bidders
## LessonTime ~ LowestBid
## LessonTime ~ LowestSuccessfulBid
## LessonTime ~ HighestBid
## LessonTime ~ BpQ
## LessonTime ~ StartTime
## [1] PL1101E PL2131 PL2132 PL3232 PL3233 PL3234 PL3235 PL3236 PL3237 PL3238 PL3239 PL3240 PL3241 PL3242 PL3243 PL3244 PL3248 PL3249 PL3250 PL3251 PL3252 PL3253 PL3254 PL3255 PL3256 PL3257 PL3258 PL3259 PL3260 PL3261 PL3281 PL3281A PL3281B PL3281C PL3281D PL3282 PL3282A PL3282C PL3283 PL3283A PL3283B PL3284 PL3285 PL3286 PL3287 PL3288 PL3289 PL4201 PL4202 PL4203 PL4205 PL4206 PL4207 PL4208 PL4213 PL4214 PL4217 PL4218 PL4219 PL4220 PL4221 PL4222 PL4223 PL4224 PL4225 PL4226 PL4227 PL4228 PL4229 PL4230 PL4231 PL4232 PL4233 PL4234 PL4235 PL4237 PL4238 PL4239 PL4240 PL4241 PL4880F PL4880G PL4880H PL4880I PL4880J PL4880K PL4880L PL4880N PL4880P PL4880Q PL4880R
## Levels: PL1101E PL2131 PL2132 PL3232 PL3233 PL3234 PL3235 PL3236 PL3237 PL3238 PL3239 PL3240 PL3241 PL3242 PL3243 PL3244 PL3248 PL3249 PL3250 PL3251 PL3252 PL3253 PL3254 PL3255 PL3256 PL3257 PL3258 PL3259 PL3260 PL3261 PL3281 PL3281A PL3281B PL3281C PL3281D PL3282 PL3282A PL3282C PL3283 PL3283A PL3283B PL3284 PL3285 PL3286 PL3287 PL3288 PL3289 PL4201 PL4202 PL4203 PL4205 PL4206 PL4207 PL4208 PL4213 PL4214 PL4217 PL4218 PL4219 PL4220 PL4221 PL4222 PL4223 PL4224 PL4225 PL4226 PL4227 PL4228 PL4229 PL4230 PL4231 PL4232 PL4233 PL4234 PL4235 PL4237 PL4238 PL4239 PL4240 PL4241 PL4880F PL4880G PL4880H PL4880I PL4880J PL4880K PL4880L PL4880N PL4880P PL4880Q PL4880R
## ModuleCode
## PL1101E PL2131 PL2132 PL3232 PL3233 PL3234 PL3235 PL3236 PL3237 PL3238 PL3239 PL3240 PL3241 PL3242 PL3243 PL3244 PL3248 PL3249 PL3250 PL3251 PL3252 PL3253 PL3254 PL3255 PL3256 PL3257 PL3258 PL3259 PL3260 PL3261 PL3281 PL3281A PL3281B PL3281C PL3281D PL3282 PL3282A PL3282C PL3283 PL3283A PL3283B PL3284 PL3285 PL3286 PL3287 PL3288 PL3289 PL4201 PL4202 PL4203 PL4205 PL4206 PL4207 PL4208 PL4213 PL4214 PL4217 PL4218 PL4219 PL4220 PL4221 PL4222 PL4223 PL4224 PL4225 PL4226 PL4227 PL4228 PL4229 PL4230 PL4231 PL4232 PL4233 PL4234 PL4235 PL4237 PL4238 PL4239 PL4240 PL4241 PL4880F PL4880G PL4880H PL4880I PL4880J PL4880K PL4880L PL4880N PL4880P PL4880Q PL4880R
## 324 84 85 124 107 110 109 112 49 35 41 37 50 46 14 29 24 21 27 16 29 3 36 21 17 17 21 15 4 50 45 44 10 11 40 39 13 6 2 6 22 37 6 30 22 22 1 23 37 43 36 43 19 13 22 20 13 26 24 5 39 30 13 20 2 18 24 16 20 7 17 5 3 22 40 43 11 2 4 5 15 15 6 6 18 15 33 4 7 4 28
# datatable(myBid, filter = "top")
aggregate(BpQ ~ AcadYear + Semester + ModuleCode,
data = mydata,
FUN = mean)